# remove(list = ls())
### loading packages
library(haven)
library(reactable)
library(sf)
library(data.table)
library(saeTrafo)
library(emdi)
library(tidyverse)

Introduction

In this script the SAE models FH and BHF will be calculated.

The Variable selection resulted that we will use this Formula, for further specification of the Models.

aestudio ~ p26_edad + ocu_military + ocu_manager + ocu_professional + ocu_technician + ocu_adminSupport + ocu_serviceSales + ocu_agriculture + ocu_unskilled + sex_male + read_knowing + urbrur_urban + health_insurance_yes + interior_plastered_yes + v04_revoq_Missing + car_yes + water_heater_yes + kitchen_yes

loading Data

### auxillary data needed to calculate FH model
dat_census_aux <- readRDS("../data_raw/2024_census/processed/census_aux_data_aggregated.RDS")

#### load processed dummy coded census data
dat <- readRDS("../data_raw/2024_census/processed/dat_dummyCoding.RDS")
pop_table <- dat %>% group_by(ID_prov) %>% count()

#### load sample data
### 172 much better for FH
# sample <- readRDS("../data_raw/samples/transformed-2/sample_172.RDS")
sample <- readRDS("../data_raw/samples/sample_for_model_building_transformed.RDS")

sample %>% class()
## [1] "data.table" "data.frame"
sample <- sample %>% as.data.frame()

# FH full nutzt aggregated/shares aus dat_census_aux
fh_formula <- Mean ~ mean_p26_edad + share_ocu_military + share_ocu_manager + 
    share_ocu_professional + share_ocu_technician + share_ocu_adminSupport + 
  share_ocu_serviceSales + share_ocu_agriculture + share_ocu_unskilled + 
  share_sex_male + share_read_knowing + share_urbrur_urban + share_health_insurance_yes + 
  share_interior_plastered_yes + share_v04_revoq_Missing + share_car_yes + share_water_heater_yes + 
  share_kitchen_yes

# BHF nutzt individuelle Dummyvariablen im Sample/Census
bhf_formula <- aestudio ~ p26_edad + ocu_military + ocu_manager + 
    ocu_professional + ocu_technician + ocu_adminSupport + ocu_serviceSales + 
    ocu_agriculture + ocu_unskilled + sex_male + read_knowing + 
    urbrur_urban + health_insurance_yes + interior_plastered_yes + 
    v04_revoq_Missing + car_yes + water_heater_yes + kitchen_yes

sample$aestudio %>% hist()

FH model

combine data

direct estimate of sample

We have a simple random sample, which makes calculating the sample variance much easier, and we dont need second order inclusion criterion, nor do we need to use bootsrap or jacknife, to calculate the direct estimate and the domain specific variance.

This is the HT estimator for the totls in a domain:\(\hat{Y}_d = \sum_{i \in s_d} \frac{y_i}{\pi_i}\)

This then is the resultig design-based variance: \[ \text{Var}_\pi(\hat{Y}_d) = \left(1 - \frac{n_d}{N_d}\right) \frac{N_d^2 S_d^2}{n_d} \]

Now we are not interest in the totals, but in the variance of the mean, which results in this formula: \[ \text{Var}_\pi(\bar{y}_d) = \frac{1}{N_d^2} \text{Var}_\pi(\hat{Y}_d) = \left(1 - \frac{n_d}{N_d}\right) \frac{S_d^2}{n_d} \]

# direct_estimates <- sample %>%
#   group_by(ID_prov) %>%
#   summarise(
#     Mean     = mean(aestudio, na.rm = TRUE),
#     n_eff    = sum(!is.na(aestudio)),
#     Var_Mean = var(aestudio, na.rm = T)/n_eff, #### we still have t inlcude FPC
#     .groups  = "drop"
#   )

sample <- sample %>% left_join(pop_table,by = "ID_prov")

direct_estimates <- sample %>%
  group_by(ID_prov) %>%
  reframe(
    Mean     = mean(aestudio, na.rm = TRUE),
    n_eff    = sum(!is.na(aestudio)),
    n = mean(n,na.rm = T),
    Var_Mean = (1 - (n_eff/n)) * var(aestudio, na.rm = T)/n_eff
  )

### recreate direct object
direct_rec <- list(
  "ind" = data.frame("Domain" = direct_estimates$ID_prov,
                     "Mean" = direct_estimates$Mean),
  "MSE" = data.frame("Domain" = direct_estimates$ID_prov,
                     "Mean" = direct_estimates$Var_Mean)
)
class(direct_rec) <- c("direct", "emdi")

# direct_rec %>% class()
# direct %>% class()


# direct <- emdi::direct(y = "aestudio", smp_data = sample ,smp_domains = "ID_prov",var = T,na.rm = T,B = 500)
# saveRDS(direct,"../data_raw/simulation/processed/direct_500_bootstraps.RDS")
direct <- readRDS("../data_raw/simulation/processed/direct_500_bootstraps.RDS")

# tmp_dat <- merge(direct$MSE[,c("Domain","Mean")],direct_estimates[,c("ID_prov","Var_Mean")],by.x = "Domain", by.y = "ID_prov")
# plot(tmp_dat$Mean,tmp_dat$Var_Mean)
# direct$successful_bootstraps

# cor.test(tmp_dat$Mean,tmp_dat$Var_Mean)

combine with auxillary data

combined_data <- merge(direct_estimates,dat_census_aux,by = "ID_prov")

FH Models

FH null model

fh_model_null <- fh(
fixed = Mean ~ 1,
vardir = "Var_Mean",
combined_data = combined_data,
domains = "ID_prov",
method = "reml",
MSE = TRUE
)

fh_model_null$MSE$FH %>% mean()
## [1] 0.7744406

FH full model

###  ml, becasue step funcion only works with ml 
fh_model_full_step <- fh(
fixed = fh_formula,
vardir = "Var_Mean",
combined_data = combined_data,
domains = "ID_prov",
method = "ml",
MSE = TRUE
)
## Warning in fh(fixed = fh_formula, vardir = "Var_Mean", combined_data =
## combined_data, : The estimate of the variance of the random effects falls at
## the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
fh_model_full_unoptimized <- fh(
fixed = fh_formula,
vardir = "Var_Mean",
combined_data = combined_data,
domains = "ID_prov",
method = "reml",
MSE = TRUE
)

fh_model_full_unoptimized$MSE$FH %>% mean()
## [1] 0.3002415
# (sqrt(fh_model_full$MSE$FH)/fh_model_full$ind$FH * 100) %>% mean()

FH step function

### model selection
emdi::step(fh_model_full_step,direction ="both")
## Start: AIC = 358.49
## Mean ~ mean_p26_edad + share_ocu_military + share_ocu_manager + 
##     share_ocu_professional + share_ocu_technician + share_ocu_adminSupport + 
##     share_ocu_serviceSales + share_ocu_agriculture + share_ocu_unskilled + 
##     share_sex_male + share_read_knowing + share_urbrur_urban + 
##     share_health_insurance_yes + share_interior_plastered_yes + 
##     share_v04_revoq_Missing + share_car_yes + share_water_heater_yes + 
##     share_kitchen_yes
## Warning in fh(fixed = Mean ~ share_ocu_military + share_ocu_manager +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_manager +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
##                                df    AIC
## - share_ocu_manager             1 356.51
## - share_interior_plastered_yes  1 356.53
## - share_ocu_unskilled           1 356.64
## - share_water_heater_yes        1 357.11
## - share_ocu_agriculture         1 357.49
## - share_ocu_military            1 357.53
## - share_urbrur_urban            1 358.19
## <none>                            358.49
## - share_v04_revoq_Missing       1 358.68
## - share_ocu_serviceSales        1 358.86
## - share_sex_male                1 359.00
## - share_kitchen_yes             1 359.00
## - share_ocu_technician          1 359.26
## - share_ocu_adminSupport        1 359.58
## - share_car_yes                 1 362.82
## - mean_p26_edad                 1 364.29
## - share_health_insurance_yes    1 365.05
## - share_ocu_professional        1 365.80
## - share_read_knowing            1 376.66
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## 
## Step: AIC = 356.51
## Mean ~ mean_p26_edad + share_ocu_military + share_ocu_professional + 
##     share_ocu_technician + share_ocu_adminSupport + share_ocu_serviceSales + 
##     share_ocu_agriculture + share_ocu_unskilled + share_sex_male + 
##     share_read_knowing + share_urbrur_urban + share_health_insurance_yes + 
##     share_interior_plastered_yes + share_v04_revoq_Missing + 
##     share_car_yes + share_water_heater_yes + share_kitchen_yes
## Warning in fh(fixed = Mean ~ share_ocu_military + share_ocu_professional + :
## The estimate of the variance of the random effects falls at the interval limit.
## It is recommended to choose a larger interval for the estimation of the
## variance of the random effects (specify interval input argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##                                df    AIC
## - share_interior_plastered_yes  1 354.55
## - share_ocu_unskilled           1 354.65
## - share_water_heater_yes        1 355.16
## - share_ocu_agriculture         1 355.52
## - share_ocu_military            1 355.66
## - share_urbrur_urban            1 356.23
## <none>                            356.51
## - share_v04_revoq_Missing       1 356.72
## - share_ocu_serviceSales        1 356.86
## - share_sex_male                1 357.01
## - share_kitchen_yes             1 357.02
## - share_ocu_adminSupport        1 357.62
## - share_ocu_technician          1 357.89
## + share_ocu_manager             1 358.49
## - share_car_yes                 1 360.82
## - mean_p26_edad                 1 362.60
## - share_health_insurance_yes    1 363.05
## - share_ocu_professional        1 364.83
## - share_read_knowing            1 374.67
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## 
## Step: AIC = 354.55
## Mean ~ mean_p26_edad + share_ocu_military + share_ocu_professional + 
##     share_ocu_technician + share_ocu_adminSupport + share_ocu_serviceSales + 
##     share_ocu_agriculture + share_ocu_unskilled + share_sex_male + 
##     share_read_knowing + share_urbrur_urban + share_health_insurance_yes + 
##     share_v04_revoq_Missing + share_car_yes + share_water_heater_yes + 
##     share_kitchen_yes
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##                                df    AIC
## - share_ocu_unskilled           1 352.67
## - share_water_heater_yes        1 353.22
## - share_ocu_agriculture         1 353.53
## - share_ocu_military            1 353.69
## - share_urbrur_urban            1 354.24
## <none>                            354.55
## - share_v04_revoq_Missing       1 354.75
## - share_ocu_serviceSales        1 354.86
## - share_kitchen_yes             1 355.12
## - share_ocu_adminSupport        1 355.62
## - share_sex_male                1 355.74
## - share_ocu_technician          1 355.89
## + share_interior_plastered_yes  1 356.51
## + share_ocu_manager             1 356.53
## - share_car_yes                 1 359.79
## - share_health_insurance_yes    1 361.07
## - share_ocu_professional        1 363.27
## - mean_p26_edad                 1 363.88
## - share_read_knowing            1 373.68
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## 
## Step: AIC = 352.67
## Mean ~ mean_p26_edad + share_ocu_military + share_ocu_professional + 
##     share_ocu_technician + share_ocu_adminSupport + share_ocu_serviceSales + 
##     share_ocu_agriculture + share_sex_male + share_read_knowing + 
##     share_urbrur_urban + share_health_insurance_yes + share_v04_revoq_Missing + 
##     share_car_yes + share_water_heater_yes + share_kitchen_yes
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##                                df    AIC
## - share_water_heater_yes        1 351.36
## - share_ocu_agriculture         1 351.53
## - share_ocu_military            1 351.94
## - share_urbrur_urban            1 352.36
## <none>                            352.67
## - share_v04_revoq_Missing       1 352.90
## - share_ocu_serviceSales        1 352.90
## - share_kitchen_yes             1 353.49
## - share_ocu_adminSupport        1 353.62
## - share_sex_male                1 353.89
## - share_ocu_technician          1 353.89
## + share_ocu_unskilled           1 354.55
## + share_interior_plastered_yes  1 354.65
## + share_ocu_manager             1 354.66
## - share_car_yes                 1 358.71
## - share_health_insurance_yes    1 359.31
## - share_ocu_professional        1 361.33
## - mean_p26_edad                 1 361.89
## - share_read_knowing            1 372.10
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## 
## Step: AIC = 351.36
## Mean ~ mean_p26_edad + share_ocu_military + share_ocu_professional + 
##     share_ocu_technician + share_ocu_adminSupport + share_ocu_serviceSales + 
##     share_ocu_agriculture + share_sex_male + share_read_knowing + 
##     share_urbrur_urban + share_health_insurance_yes + share_v04_revoq_Missing + 
##     share_car_yes + share_kitchen_yes
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##                                df    AIC
## - share_ocu_agriculture         1 350.16
## - share_ocu_military            1 350.32
## - share_v04_revoq_Missing       1 351.33
## <none>                            351.36
## - share_urbrur_urban            1 351.40
## - share_kitchen_yes             1 351.77
## - share_ocu_serviceSales        1 352.20
## - share_ocu_adminSupport        1 352.52
## - share_sex_male                1 352.65
## + share_water_heater_yes        1 352.67
## - share_ocu_technician          1 353.10
## + share_ocu_unskilled           1 353.22
## + share_ocu_manager             1 353.32
## + share_interior_plastered_yes  1 353.32
## - share_car_yes                 1 356.88
## - share_health_insurance_yes    1 358.16
## - mean_p26_edad                 1 361.37
## - share_ocu_professional        1 362.84
## - share_read_knowing            1 370.12
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## 
## Step: AIC = 350.16
## Mean ~ mean_p26_edad + share_ocu_military + share_ocu_professional + 
##     share_ocu_technician + share_ocu_adminSupport + share_ocu_serviceSales + 
##     share_sex_male + share_read_knowing + share_urbrur_urban + 
##     share_health_insurance_yes + share_v04_revoq_Missing + share_car_yes + 
##     share_kitchen_yes
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##                                df    AIC
## - share_ocu_military            1 348.95
## <none>                            350.16
## - share_ocu_serviceSales        1 350.21
## - share_v04_revoq_Missing       1 350.94
## - share_kitchen_yes             1 351.02
## - share_urbrur_urban            1 351.05
## - share_sex_male                1 351.18
## + share_ocu_agriculture         1 351.36
## + share_water_heater_yes        1 351.53
## - share_ocu_adminSupport        1 351.80
## - share_ocu_technician          1 351.99
## + share_ocu_manager             1 352.09
## + share_ocu_unskilled           1 352.16
## + share_interior_plastered_yes  1 352.16
## - share_car_yes                 1 354.99
## - share_health_insurance_yes    1 356.48
## - mean_p26_edad                 1 359.43
## - share_ocu_professional        1 360.88
## - share_read_knowing            1 368.23
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## 
## Step: AIC = 348.95
## Mean ~ mean_p26_edad + share_ocu_professional + share_ocu_technician + 
##     share_ocu_adminSupport + share_ocu_serviceSales + share_sex_male + 
##     share_read_knowing + share_urbrur_urban + share_health_insurance_yes + 
##     share_v04_revoq_Missing + share_car_yes + share_kitchen_yes
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##                                df    AIC
## <none>                            348.95
## - share_v04_revoq_Missing       1 349.01
## - share_ocu_serviceSales        1 349.54
## - share_kitchen_yes             1 349.55
## - share_urbrur_urban            1 350.10
## + share_ocu_military            1 350.16
## - share_sex_male                1 350.17
## + share_ocu_agriculture         1 350.32
## - share_ocu_technician          1 350.54
## + share_water_heater_yes        1 350.57
## + share_ocu_manager             1 350.76
## - share_ocu_adminSupport        1 350.79
## + share_ocu_unskilled           1 350.94
## + share_interior_plastered_yes  1 350.94
## - share_car_yes                 1 355.34
## - share_health_insurance_yes    1 355.71
## - mean_p26_edad                 1 358.17
## - share_ocu_professional        1 361.02
## - share_read_knowing            1 370.07
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## 
## Call:
##  fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional + share_ocu_technician + 
##     share_ocu_adminSupport + share_ocu_serviceSales + share_sex_male + 
##     share_read_knowing + share_urbrur_urban + share_health_insurance_yes + 
##     share_v04_revoq_Missing + share_car_yes + share_kitchen_yes, 
##     vardir = "Var_Mean", combined_data = combined_data, domains = "ID_prov", 
##     method = "ml", MSE = TRUE)
## 
## Coefficients:
##                             coefficients   std.error t.value   p.value    
## (Intercept)                    9.453181    5.703735  1.6574 0.0974454 .  
## mean_p26_edad                 -0.199955    0.058906 -3.3945 0.0006876 ***
## share_ocu_professional        29.466319    7.653160  3.8502 0.0001180 ***
## share_ocu_technician          65.682717   34.608415  1.8979 0.0577115 .  
## share_ocu_adminSupport      -120.453130   61.268534 -1.9660 0.0493001 *  
## share_ocu_serviceSales         9.465757    5.881384  1.6094 0.1075193    
## share_sex_male               -10.734112    5.985460 -1.7934 0.0729146 .  
## share_read_knowing            14.674394    2.994312  4.9008 9.547e-07 ***
## share_urbrur_urban            -1.665386    0.938380 -1.7747 0.0759400 .  
## share_health_insurance_yes     6.771021    2.275802  2.9752 0.0029277 ** 
## share_v04_revoq_Missing       -6.953541    4.843080 -1.4358 0.1510682    
## share_car_yes                 -4.090169    1.411367 -2.8980 0.0037553 ** 
## share_kitchen_yes             -2.494927    1.547109 -1.6126 0.1068231    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fh_formula_optimized <- Mean ~ mean_p26_edad + share_ocu_professional + share_ocu_technician +
    share_ocu_adminSupport + share_ocu_serviceSales + share_sex_male +
    share_read_knowing + share_urbrur_urban + share_health_insurance_yes +
    share_v04_revoq_Missing + share_car_yes + share_kitchen_yes


### which is the optimized one
fh_model_full <- fh(fixed = fh_formula_optimized,
    vardir = "Var_Mean", combined_data = combined_data, domains = "ID_prov",
    method = "reml", MSE = TRUE)

fh_model_full$MSE$FH %>% mean()
## [1] 0.225684
# step(fh_model_full,direction ="backward",criteria = "BIC")

FH dignostics

p_dig_FH <- plot(fh_model_full)
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## ℹ The deprecated feature was likely used in the emdi package.
##   Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Press [enter] to continue
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • fill = color[2]
## • color = color[2]
## ℹ Did you misspell an argument name?

## Press [enter] to continue
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • fill = color[2]
## • color = color[2]
## ℹ Did you misspell an argument name?

p_dig_FH_comp <- emdi::compare_plot(model = fh_model_full,direct = direct_estimates,MSE = T,CV = T)

## Press [enter] to continue
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the emdi package.
##   Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Press [enter] to continue

## Press [enter] to continue

## Press [enter] to continue

## Press [enter] to continue

p_dig_FH
## $qq_plots
## TableGrob (1 x 2) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 
## $density_res

## 
## $density_ran

## 
## $cooks_distance
## [1] NA
## 
## $likelihood
## [1] NA
p_dig_FH_comp
## $scatter_FH

## 
## $line_FH

## 
## $scatter_FH
## NULL
## 
## $line_FH
## NULL
## 
## $scatter_FH
## NULL
## 
## $line_FH
## NULL
## 
## $boxplot_MSE_FH

## 
## $ordered_MSE_FH

## 
## $boxplot_CV_FH

## 
## $ordered_CV_FH

log FH

fh_model_full_log <- fh(
fixed = fh_formula,
vardir = "Var_Mean",
combined_data = combined_data,
domains = "ID_prov",
method = "reml",transformation = "log",backtransformation = "bc_crude",
MSE = TRUE
)

plot(fh_model_full_log)

## Press [enter] to continue
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • fill = color[2]
## • color = color[2]
## ℹ Did you misspell an argument name?

## Press [enter] to continue
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • fill = color[2]
## • color = color[2]
## ℹ Did you misspell an argument name?

fh_model_full_log$MSE$FH %>% mean()
## [1] 0.2094632

BHF model

process data

### create vector with sizo of domains
vec_pop_size <- table(dat$ID_prov)

nrow(dat)
## [1] 7684281
dat_BHF_census <- dat[!is.na(dat$aestudio),]
nrow(dat_BHF_census)
## [1] 7350282
dat_BHF_census <- na.omit(dat_BHF_census)
nrow(dat_BHF_census)
## [1] 7350282
dat_BHF_census$ID_prov <- dat_BHF_census$ID_prov %>% as.factor()
# sapply(dat_BHF_census,FUN = function(x) sum(is.na(x)))

nrow(sample)
## [1] 2260
dat_BHF_sample <- sample[!is.na(sample$aestudio),]
nrow(dat_BHF_sample)
## [1] 2260
dat_BHF_sample <- na.omit(dat_BHF_sample)
nrow(dat_BHF_sample)
## [1] 2260
dat_BHF_sample$ID_prov <- dat_BHF_sample$ID_prov %>% as.factor()
sapply(dat_BHF_sample,FUN = function(x) sum(is.na(x)))
##                        ID_prov                       aestudio 
##                              0                              0 
##                       p26_edad                   ocu_military 
##                              0                              0 
##                    ocu_manager               ocu_professional 
##                              0                              0 
##                 ocu_technician               ocu_adminSupport 
##                              0                              0 
##               ocu_serviceSales                ocu_agriculture 
##                              0                              0 
##               ocu_construction                  ocu_operators 
##                              0                              0 
##                  ocu_unskilled                        ocu_NaN 
##                              0                              0 
##              ocu_1d_13_Missing                     sex_female 
##                              0                              0 
##                       sex_male                   read_knowing 
##                              0                              0 
##               read_not_knowing             read_not_specified 
##                              0                              0 
##                   urbrur_urban                   urbrur_rural 
##                              0                              0 
##           health_insurance_yes            health_insurance_no 
##                              0                              0 
## health_insurance_not_specified              p30b_caja_Missing 
##                              0                              0 
##         interior_plastered_yes          interior_plastered_no 
##                              0                              0 
##              v04_revoq_Missing                        car_yes 
##                              0                              0 
##                         car_no              car_not_specified 
##                              0                              0 
##              v18c_auto_Missing               water_heater_yes 
##                              0                              0 
##                water_heater_no     water_heater_not_specified 
##                              0                              0 
##           v18h_calefon_Missing                    kitchen_yes 
##                              0                              0 
##                     kitchen_no             v12_cocina_Missing 
##                              0                              0 
##                              n 
##                              0

BHF Model

BHF_model_full_unoptimized <-  NER_Trafo(fixed = bhf_formula
  ,smp_domains = "ID_prov"
  , smp_data = dat_BHF_sample
  , pop_data = dat_BHF_census
  , pop_area_size = vec_pop_size
  , pop_domains = "ID_prov"
  , transformation = "no"
  , seed = 2022
  , MSE = T
)
## [1] "More SAE methods for full population data and transformations are offered in the R package emdi."
BHF_model_full_unoptimized$MSE$Mean %>% mean()
## [1] 0.2790526

step function with intercept

library(lmerTest)
## Loading required package: lme4
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:emdi':
## 
##     step
## The following object is masked from 'package:stats':
## 
##     step
### model selection
full_model <- lm(bhf_formula,data = sample)
bhf_formula_intercept <- aestudio ~ p26_edad + ocu_military + ocu_manager + ocu_professional + 
    ocu_technician + ocu_adminSupport + ocu_serviceSales + ocu_agriculture + 
    ocu_unskilled + sex_male + read_knowing + urbrur_urban + 
    health_insurance_yes + interior_plastered_yes + v04_revoq_Missing + 
    car_yes + water_heater_yes + kitchen_yes + (1|ID_prov)

lmer_model <- lmer(bhf_formula_intercept,data = sample)
# lmer_model %>% summary()

step(lmer_model)
## Backward reduced random-effect table:
## 
##               Eliminated npar  logLik   AIC    LRT Df Pr(>Chisq)    
## <none>                     21 -6068.9 12180                         
## (1 | ID_prov)          0   20 -6079.0 12198 20.159  1  7.125e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##                        Eliminated Sum Sq Mean Sq NumDF  DenDF  F value
## v04_revoq_Missing               1   33.3    33.3     1 2240.7   2.7201
## p26_edad                        0 5541.1  5541.1     1 2241.3 451.6769
## ocu_military                    0   83.8    83.8     1 2222.2   6.8312
## ocu_manager                     0  104.2   104.2     1 2239.5   8.4936
## ocu_professional                0 3637.3  3637.3     1 2231.0 296.4944
## ocu_technician                  0  521.3   521.3     1 2217.6  42.4959
## ocu_adminSupport                0  178.7   178.7     1 2224.9  14.5638
## ocu_serviceSales                0   59.7    59.7     1 2231.4   4.8659
## ocu_agriculture                 0  514.7   514.7     1 2236.7  41.9543
## ocu_unskilled                   0   77.3    77.3     1 2221.1   6.3046
## sex_male                        0  193.5   193.5     1 2218.0  15.7743
## read_knowing                    0 4182.1  4182.1     1 2235.8 340.9007
## urbrur_urban                    0  127.0   127.0     1 1222.8  10.3512
## health_insurance_yes            0  315.4   315.4     1 2242.0  25.7082
## interior_plastered_yes          0  311.8   311.8     1 2107.4  25.4185
## car_yes                         0   97.7    97.7     1 2239.7   7.9621
## water_heater_yes                0  109.8   109.8     1 2237.5   8.9509
## kitchen_yes                     0  114.0   114.0     1 2241.8   9.2940
##                           Pr(>F)    
## v04_revoq_Missing      0.0992343 .  
## p26_edad               < 2.2e-16 ***
## ocu_military           0.0090184 ** 
## ocu_manager            0.0035994 ** 
## ocu_professional       < 2.2e-16 ***
## ocu_technician         8.742e-11 ***
## ocu_adminSupport       0.0001392 ***
## ocu_serviceSales       0.0274934 *  
## ocu_agriculture        1.145e-10 ***
## ocu_unskilled          0.0121132 *  
## sex_male               7.364e-05 ***
## read_knowing           < 2.2e-16 ***
## urbrur_urban           0.0013279 ** 
## health_insurance_yes   4.297e-07 ***
## interior_plastered_yes 5.009e-07 ***
## car_yes                0.0048189 ** 
## water_heater_yes       0.0028037 ** 
## kitchen_yes            0.0023259 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model found:
## aestudio ~ p26_edad + ocu_military + ocu_manager + ocu_professional + ocu_technician + ocu_adminSupport + ocu_serviceSales + ocu_agriculture + ocu_unskilled + sex_male + read_knowing + urbrur_urban + health_insurance_yes + interior_plastered_yes + car_yes + water_heater_yes + kitchen_yes + (1 | ID_prov)

Suggested model by step funktion bhf_formula_optimized <- aestudio ~ p26_edad + ocu_military + ocu_manager + ocu_professional + ocu_technician + ocu_adminSupport + ocu_serviceSales + ocu_agriculture + ocu_unskilled + sex_male + read_knowing + urbrur_urban + health_insurance_yes + interior_plastered_yes + car_yes + water_heater_yes + kitchen_yes + (1 | ID_prov)

bhf_formula_optimized <-  aestudio ~ p26_edad + ocu_military + ocu_manager + ocu_professional + ocu_technician + ocu_adminSupport + ocu_serviceSales + ocu_agriculture + ocu_unskilled + sex_male + read_knowing + urbrur_urban + health_insurance_yes + interior_plastered_yes + car_yes + water_heater_yes + kitchen_yes

BHF_model_full <-  NER_Trafo(fixed = bhf_formula_optimized
  ,smp_domains = "ID_prov"
  , smp_data = dat_BHF_sample
  , pop_data = dat_BHF_census
  , pop_area_size = vec_pop_size
  , pop_domains = "ID_prov"
  , transformation = "no"
  , seed = 2022
  , MSE = T
)
## [1] "More SAE methods for full population data and transformations are offered in the R package emdi."
BHF_model_full$MSE$Mean %>% mean()
## [1] 0.2792847

diagnostics

p_dig_BHF <- plot(BHF_model_full)

## Press [enter] to continue
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • fill = color[2]
## • color = color[2]
## ℹ Did you misspell an argument name?

## Press [enter] to continue
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • fill = color[2]
## • color = color[2]
## ℹ Did you misspell an argument name?

## Press [enter] to continue

p_dig_BHF_comp <- saeTrafo::compare_plot(model = BHF_model_full,direct = direct,MSE = T,CV = T)

## Press [enter] to continue

## Press [enter] to continue

## Press [enter] to continue

## Press [enter] to continue

## Press [enter] to continue

pretty diagnostics

FH

library(cowplot)
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:lubridate':
## 
##     stamp
p1 <- p_dig_FH$density_res

p2 <- p_dig_FH$density_res +
  # scale_color_manual(values = c("red")) +
  # scale_fill_manual(values = c("red")) +
  geom_density(color = "#FF8552",fill = "#FF8552",alpha = .15, linewidth = .4) +
   # geom_hline(yintercept = 0, linetype = "dashed", color = "grey40") +
  theme_minimal() +
    labs(
    title = "FH - Residual Assumption",
    # subtitle = "Fay–Herriot model",
    x = "standardized Residuals",
    y = "Density",
    # caption = "Source: household survey + census"
  )

p3 <- p_dig_FH$density_ran

p4 <- p_dig_FH$density_ran +
  # scale_color_manual(values = c("red")) +
  # scale_fill_manual(values = c("red")) +
  geom_density(color = "#FF8552",fill = "#FF8552",alpha = .15, linewidth = .4) +
   # geom_hline(yintercept = 0, linetype = "dashed", color = "grey40") +
  theme_minimal() +
    labs(
    title = "FH - Randome Effects Assumption",
    # subtitle = "Fay–Herriot model",
    x = "standardized randome effects",
    y = "Density",
    # caption = "Source: household survey + census"
  )

plot_grid(p1,p2,p3,p4,nrow = 2,ncol = 2)

p5 <- p_dig_FH_comp$scatter_FH

p6 <- p_dig_FH_comp$scatter_FH +
  geom_smooth(aes(color = "Reg. line"), method = "lm", se = FALSE) +
  scale_color_manual(values = c("Reg. line" = "#9A031E", "Intersection" = "#FF8552")) +
  theme_minimal() +
  labs(
    title = "direct vs fh estimates",
    x = "direct",
    y = "fh-model",
    color = ""
  )
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p7 <- p_dig_FH_comp$line_FH

p8 <- p_dig_FH_comp$line_FH +
  # Override layer colors to be mapped instead of fixed
  # geom_line(aes(color = c("Direct", "Model-based"))) +
  # geom_line(size = .8,alpha = .3) +
  # geom_point(size = 1, alpha = .5) +
  scale_color_manual(values = c("#9A031E","#FF8552")) +
  theme_minimal() +
  labs(
    title = "Direct vs FH estimates ordered by Domain size",
    x = "Domain - ordered by decreasing MSE of Direct",
    y = "mean years of schooling estimate",
    color = "Method"
  )
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p9 <- p_dig_FH_comp$ordered_MSE_FH

p10 <- p_dig_FH_comp$ordered_MSE_FH +
  scale_color_manual(values = c("#9A031E","#FF8552")) +
  theme_minimal() +
    labs(
    title = "MSE: direct vs FH",
    # subtitle = "Fay–Herriot model",
    x = "Domain - ordered by increasing MSE of Direct",
    y = "MSE",
    # caption = "Source: household survey + census"
  )
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
plot_grid(p5,p6,p7,p8,p9,p10,nrow = 3,ncol = 2)
## `geom_smooth()` using formula = 'y ~ x'

plot_grid_fh <- plot_grid(p2,p4,p8,p10,nrow = 4)
plot_grid_fh

ggsave("../output/diagnostics_fh.svg",
       plot = plot_grid_fh,
       width = 6,      # width in inches
       height = 16,    # height in inches; make it long enough for 5 plots
       units = "in",   # units for width/height
       device = "svg")

ggsave("../output/diagnostics_fh.png", 
       plot = plot_grid_fh,
       width = 6,      # width in inches
       height = 16,    # height in inches; make it long enough for 5 plots
       units = "in",   # units for width/height
       dpi = 300)

BHF

p1 <- p_dig_BHF$density_res

p2 <- p_dig_BHF$density_res +
  # scale_color_manual(values = c("red")) +
  # scale_fill_manual(values = c("red")) +
  geom_density(color = "#7ca982",fill = "#7ca982",alpha = .15, linewidth = .4) +
   # geom_hline(yintercept = 0, linetype = "dashed", color = "grey40") +
  theme_minimal() +
    labs(
    title = "BHF - Residual Assumption",
    # subtitle = "Fay–Herriot model",
    x = "standardized Residuals",
    y = "Density",
    # caption = "Source: household survey + census"
  )

p3 <- p_dig_BHF$density_ran

p4 <- p_dig_FH$density_ran +
  # scale_color_manual(values = c("red")) +
  # scale_fill_manual(values = c("red")) +
  geom_density(color = "#7ca982",fill = "#7ca982",alpha = .15, linewidth = .4) +
   # geom_hline(yintercept = 0, linetype = "dashed", color = "grey40") +
  theme_minimal() +
    labs(
    title = "BHF - Randome Effects Adsumption",
    # subtitle = "Fay–Herriot model",
    x = "standardized randome effects",
    y = "Desnity",
    # caption = "Source: household survey + census"
  )

plot_grid(p1,p2,p3,p4,nrow = 2,ncol = 2)

p5 <- p_dig_BHF_comp$scatter_Mean

p6 <- p_dig_BHF_comp$scatter_Mean +
  geom_smooth(aes(color = "Reg. line"), method = "lm", se = FALSE) +
  scale_color_manual(values = c("Reg. line" = "#4e6b57", "Intersection" = "#b5c5b4")) +
  theme_minimal() +
  labs(
    title = "direct vs BHF estimates",
    x = "direct",
    y = "BHF-Mode",
    color = ""
  )
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p7 <- p_dig_BHF_comp$line_Mean

p8 <- p_dig_BHF_comp$line_Mean +
  # Override layer colors to be mapped instead of fixed
  # geom_line(aes(color = c("Direct", "Model-based"))) +
  # geom_line(aes(size = .8,alpha = .3)) +
  # geom_point(aes(size = 1, alpha = .2)) +
  scale_color_manual(values = c("#4e6b57","#b5c5b4")) +
  theme_minimal() +
  labs(
    title = "Direct vs BHF estimates ordered by Domain size",
    x = "Domain - ordered by decreasing MSE of direct estimation",
    y = "mean years of schooling estimate",
    color = "Method"
  )
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p9 <- p_dig_BHF_comp$ordered_MSE_Mean

p10 <- p_dig_BHF_comp$ordered_MSE_Mean +
  scale_color_manual(values = c("#4e6b57","#b5c5b4")) +
  theme_minimal() +
    labs(
    title = "MSE: direct vs BHF",
    # subtitle = "Fay–Herriot model",
    x = "Domain - ordered by increasing MSE of direct estimation",
    y = "MSE",
    # caption = "Source: household survey + census"
  )
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
plot_grid(p5,p6,p7,p8,p9,p10,nrow = 3,ncol = 2)
## `geom_smooth()` using formula = 'y ~ x'

plot_grid_bhf <- plot_grid(p2,p4,p8,p10,nrow = 5)
plot_grid_bhf

ggsave("../output/diagnostics_bhf.svg", 
       plot = plot_grid_bhf,
       width = 6,      # width in inches
       height = 16,    # height in inches; make it long enough for 5 plots
       units = "in",   # units for width/height
       device = "svg")

ggsave("../output/disgnostics_bhf.png", 
       plot = plot_grid_bhf,
       width = 6,      # width in inches
       height = 16,    # height in inches; make it long enough for 5 plots
       units = "in",   # units for width/height
       dpi = 300)
# fh_model_full$MSE$Direct %>% mean()
# fh_model_full$MSE$FH %>% mean()
# BHF_model_full$MSE$Mean %>% mean()